Simple dotplot

qstart <- 1:100
tstart <- 1:100

plot(qstart, tstart)
title( main = "Identity" )
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )

cbind(qstart, tstart)[1:7, ]
##      qstart tstart
## [1,]      1      1
## [2,]      2      2
## [3,]      3      3
## [4,]      4      4
## [5,]      5      5
## [6,]      6      6
## [7,]      7      7
tstart2 <- tstart
tstart2 <- max(tstart2) - tstart2
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Inverted sequence" )

cbind(qstart, tstart2)[1:7, ]
##      qstart tstart2
## [1,]      1      99
## [2,]      2      98
## [3,]      3      97
## [4,]      4      96
## [5,]      5      95
## [6,]      6      94
## [7,]      7      93
tstart2 <- tstart
tstart2[20:60] <- tstart2[60:20]
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Inversion (within sequence)" )

tstart2 <- tstart
tstart2[40:length(tstart2)] <- tstart2[40:length(tstart2)] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion (within sequence)" )
abline( a = 40, b = 0, lty = 3, col = "#B22222" )
abline( a = 60, b = 0, lty = 3, col = "#B22222" )

#abline( v = seq(1, 100, by = 10), lty = 3, col = "#B22222" )
#abline( h = seq(1, 200, by = 20), lty = 3, col = "#B22222" )

tstart2 <- tstart
tstart2[20:60] <- tstart2[20:60] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion (within sequence)" )
abline( a = 60, b = 0, lty = 3, col = "#B22222" )
abline( a = 80, b = 0, lty = 3, col = "#B22222" )

tstart2 <- tstart
tstart2[40:length(tstart2)] <- tstart2[40:length(tstart2)] + 20
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Insertion with extra alignment" )
#abline( a = 40, b = 0, lty = 3, col = "#B22222" )
#abline( a = 60, b = 0, lty = 3, col = "#B22222" )
points( x = 80:90, y = 10:20)

tstart2 <- tstart
#tstart2[20:60] <- tstart2[60:20]
set.seed(9)
tstart2[20:80] <- sample(tstart2[20:80])
plot(qstart, tstart2)
abline( a = 0, b = 1, lty = 3, col = "#1E90FF" )
title( main = "Spaghetti plate in the middle" )

pafr

https://cran.r-project.org/web/packages/pafr/index.html

library(pafr)
## Loading required package: ggplot2
#my_paf <- read_paf("CBDRx_Purple_Kush.paf")
my_paf <- read_paf("CBDRx_Purple_Kush.paf.gz", include_tags = TRUE)
my_paf
## pafr object with 290411 alignments (792.6Mb)
##  6527 query seqs
##  163 target seqs
##  12 tags: NM, ms, AS, nn, tp, cm, s1, s2, de, zd, rl ...
#
as.data.frame(my_paf)[1:3, 1:23]
##        qname     qlen   qstart     qend strand       tname      tlen    tstart
## 1 CM010790.2 79255070  4540967  4626503      + NC_044374.1  88181582   3677124
## 2 CM010790.2 79255070 23140036 23192664      + NC_044374.1  88181582  56189690
## 3 CM010790.2 79255070 71982825 72027138      + NC_044370.1 104987320 102213668
##        tend nmatch  alen mapq  NM    ms    AS nn tp   cm    s1   s2     de zd
## 1   3762821  85435 85726   60 291 78457 78457  0  P 8198 83929 4879 0.0023  1
## 2  56242439  52353 52788   60 435 43918 43946  0  P 4220 44699 2663 0.0061  3
## 3 102257983  44297 44323   60  26 43680 43584  0  P 4387 44245    0 0.0005  3
##         rl
## 1 15578069
## 2 15578069
## 3 15578069
#as.data.frame(my_paf)[1:3, 1:12]
prim_alignment <- filter_secondary_alignments(my_paf)
nrow(my_paf)
## [1] 290411
nrow(prim_alignment)
## [1] 230879
dotplot(prim_alignment, 
        #order_by = "provided", 
        label_seqs = TRUE, 
        xlab = "Purple Kush", 
        ylab = "CBDRx")

Subset

as.data.frame(prim_alignment)[1:3, 1:12]
##        qname     qlen   qstart     qend strand       tname      tlen    tstart
## 1 CM010790.2 79255070  4540967  4626503      + NC_044374.1  88181582   3677124
## 2 CM010790.2 79255070 23140036 23192664      + NC_044374.1  88181582  56189690
## 3 CM010790.2 79255070 71982825 72027138      + NC_044370.1 104987320 102213668
##        tend nmatch  alen mapq
## 1   3762821  85435 85726   60
## 2  56242439  52353 52788   60
## 3 102257983  44297 44323   60
# table(prim_alignment$tname)
ali <- prim_alignment[grep("NC_044", prim_alignment$tname), ]
nrow(ali)
## [1] 224946
hist(ali$qlen)

ali <- ali[ali$qlen > 2e07, ]
nrow(ali)
## [1] 150680
p <- dotplot(ali, 
             #order_by = "provided", 
             label_seqs = TRUE, 
             xlab = "Purple Kush", 
             ylab = "CBDRx")
p <- p + theme_bw()
p

CHROM3

table(ali$tname)
## 
## NC_044370.1 NC_044371.1 NC_044372.1 NC_044373.1 NC_044374.1 NC_044375.1 
##       15791       13964       16752       16198       16238       16109 
## NC_044376.1 NC_044377.1 NC_044378.1 NC_044379.1 
##       11738       17526       15078       11286
chrom3 <- ali[ali$tname == "NC_044372.1", ]

p <- dotplot(chrom3, 
             #order_by = "provided", 
             label_seqs = TRUE, 
             xlab = "Purple Kush", 
             ylab = "CBDRx")
p <- p + theme_bw()
p

sort(table(ali$qname), decreasing = TRUE)
## 
## CM010790.2 CM010794.2 CM010796.2 CM010792.2 CM010793.2 CM010797.2 CM010795.2 
##      19448      18750      17831      16977      16956      14329      14244 
## CM010799.2 CM010798.2 CM010791.2 
##      13952      11366       6827
#chrom3a <- chrom3[chrom3$qname == "CM010790.2", ]
#chrom3a <- chrom3[chrom3$qname == "CM010794.2", ]
#chrom3a <- chrom3[grep("CM010793.2|CM010794.2|CM010795.2", chrom3$qname), ]
#chrom3a <- chrom3[grep("CM010790.2|CM010792.2|CM010793.2|CM010794.2", chrom3$qname), ]
#chrom3a <- chrom3[grep("CM010790.2|CM010791.2|CM010792.2|CM010793.2|CM010794.2", chrom3$qname), ]
chrom3a <- chrom3[grep("CM010790.2|CM010794.2|CM010792.2", chrom3$qname), ]

#chrom3a <- chrom3[chrom3$qname == "CM010791.2", ]

p <- dotplot(chrom3a, 
             #order_by = "provided", 
             label_seqs = TRUE, 
             xlab = "Purple Kush", 
             ylab = "CBDRx")
p <- p + theme_bw()
p <- p + theme( axis.text.x = element_text(angle = 60, hjust = 1) )
p

pointdata <- data.frame(
  xname = c(0, 20e6, 20e6, 0, 100e6),
  ypos = c(0, 0, 20e6, 20e6, 20e6),
  ptyname = c(1, 1, 1, 1, 1)
)

p + geom_point(data = pointdata, 
               mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
               color = "#B22222", size = 4)

# ggsave(filename = "CBDRx_chrom3_PK_dotplot.tiff", 
#        device = "tiff", 
#        width = 3.25, height = 3.25, units = "in", dpi = 300, 
#        compression = "lzw")
#p + geom_segment()

as.data.frame(chrom3a)[1:3, 1:12]
##          qname     qlen   qstart     qend strand       tname     tlen   tstart
## 88  CM010790.2 79255070 38631731 38652977      - NC_044372.1 94670641 26983832
## 92  CM010790.2 79255070 12394945 12425856      + NC_044372.1 94670641 27176310
## 113 CM010790.2 79255070 47893129 47913001      + NC_044372.1 94670641 23994558
##         tend nmatch  alen mapq
## 88  27005139  21197 21316   60
## 92  27208748  30492 32617   60
## 113 24014452  19782 19911   60
chrom3b <- chrom3a[chrom3a$qname == "CM010794.2", ]

p <- dotplot(chrom3b, 
             #order_by = "provided", 
             label_seqs = TRUE, 
             xlab = "Purple Kush", 
             ylab = "CBDRx")
p <- p + theme_bw()
p <- p + theme( axis.text.x = element_text(angle = 60, hjust = 1) )
p

# p + geom_segment(data = chrom3a, 
#          aes_string(x = "qstart", xend = "qend", y = "tstart", yend = "tend"),
#          size=1.2, colour="#B22222")

line_size <- 1.2
alignment_colour <- "#1E90FF"
p + geom_segment(data = chrom3b[chrom3b$strand == "+", ], 
         aes(x = qstart, xend = qend, y = tstart, yend = tend),
         #size=1.2,
         linewidth=line_size,
         colour=alignment_colour) +
  geom_segment(data = chrom3b[chrom3b$strand=="-",],
        aes(x = qend, xend = qstart, y = tstart, yend = tend),
        size = line_size,
        colour = alignment_colour) 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

pointdata <- data.frame(
  xname = c(0, 20e6, 20e6, 0),
  ypos = c(0, 0, 20e6, 20e6),
  ptyname = c(1, 1, 1, 1)
)

p + geom_point(data = pointdata, 
               mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
               color = "#B22222", size = 4)

p + geom_point(data = pointdata, 
               mapping = aes(x = xname, y = ypos, shape = factor(ptyname)),
               color = "#B22222", size = 4) + theme(legend.position="none")

chrom3b
## pafr object with 9355 alignments (36.1Mb)
##  1 query seqs
##  1 target seqs
##  12 tags: NM, ms, AS, nn, tp, cm, s1, s2, de, zd, rl ...
my_df <- as.data.frame(chrom3b)
my_df[ my_df$qstart > 70e6 & my_df$tstart < 10e6, 1:12]
##            qname     qlen   qstart     qend strand       tname     tlen  tstart
## 56427 CM010794.2 78152476 72764325 72769367      + NC_044372.1 94670641 9756412
## 57394 CM010794.2 78152476 75343554 75348186      + NC_044372.1 94670641 3159184
## 64475 CM010794.2 78152476 75342123 75343006      + NC_044372.1 94670641 3157006
## 64605 CM010794.2 78152476 75336176 75338001      + NC_044372.1 94670641 3237442
## 67742 CM010794.2 78152476 75338909 75341018      + NC_044372.1 94670641 3146407
## 68762 CM010794.2 78152476 75334656 75335586      + NC_044372.1 94670641 3235921
## 68849 CM010794.2 78152476 75338488 75339725      + NC_044372.1 94670641 3239680
## 69133 CM010794.2 78152476 75025700 75026457      - NC_044372.1 94670641 2513295
##          tend nmatch alen mapq
## 56427 9761456   5001 5044   60
## 57394 3164279   4548 5108    3
## 64475 3157889    867  884    2
## 64605 3239215   1729 1830   60
## 67742 3148530   2019 2131   60
## 68762 3236854    906  933   60
## 68849 3241099   1195 1423   53
## 69133 2514053    742  759   22